In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

File import

In [3]:
data = pd.read_excel("AirQuality.xlsx")
In [4]:
data.head(10)
Out[4]:
Country State city place lastupdate Avg Max Min Pollutants
0 India Andhra_Pradesh Amaravati Secretariat, Amaravati - APPCB 21-12-2018 03:00:00 70.0 108.0 42.0 PM2.5
1 India Andhra_Pradesh Amaravati Secretariat, Amaravati - APPCB 21-12-2018 03:00:00 76.0 102.0 43.0 PM10
2 India Andhra_Pradesh Amaravati Secretariat, Amaravati - APPCB 21-12-2018 03:00:00 73.0 118.0 46.0 NO2
3 India Andhra_Pradesh Amaravati Secretariat, Amaravati - APPCB 21-12-2018 03:00:00 5.0 6.0 4.0 NH3
4 India Andhra_Pradesh Amaravati Secretariat, Amaravati - APPCB 21-12-2018 03:00:00 41.0 109.0 2.0 SO2
5 India Andhra_Pradesh Amaravati Secretariat, Amaravati - APPCB 21-12-2018 03:00:00 44.0 102.0 18.0 CO
6 India Andhra_Pradesh Amaravati Secretariat, Amaravati - APPCB 21-12-2018 03:00:00 29.0 35.0 12.0 OZONE
7 India Andhra_Pradesh Rajamahendravaram Anand Kala Kshetram, Rajamahendravaram - APPCB 21-12-2018 03:00:00 NaN NaN NaN PM2.5
8 India Andhra_Pradesh Rajamahendravaram Anand Kala Kshetram, Rajamahendravaram - APPCB 21-12-2018 03:00:00 NaN NaN NaN PM10
9 India Andhra_Pradesh Rajamahendravaram Anand Kala Kshetram, Rajamahendravaram - APPCB 21-12-2018 03:00:00 NaN NaN NaN NO2
In [5]:
del data['lastupdate']
data.head()
Out[5]:
Country State city place Avg Max Min Pollutants
0 India Andhra_Pradesh Amaravati Secretariat, Amaravati - APPCB 70.0 108.0 42.0 PM2.5
1 India Andhra_Pradesh Amaravati Secretariat, Amaravati - APPCB 76.0 102.0 43.0 PM10
2 India Andhra_Pradesh Amaravati Secretariat, Amaravati - APPCB 73.0 118.0 46.0 NO2
3 India Andhra_Pradesh Amaravati Secretariat, Amaravati - APPCB 5.0 6.0 4.0 NH3
4 India Andhra_Pradesh Amaravati Secretariat, Amaravati - APPCB 41.0 109.0 2.0 SO2

Plotting avg,min and max pollution

In [6]:
plt.plot(data['Avg'])
plt.xlabel('cities')
plt.ylabel('amount')
plt.title('Average Pollution Data')
Out[6]:
Text(0.5, 1.0, 'Average Pollution Data')
In [7]:
plt.plot(data['Max'])
plt.xlabel('cities')
plt.ylabel('amount')
plt.title('Maximum Pollution Data')
Out[7]:
Text(0.5, 1.0, 'Maximum Pollution Data')
In [8]:
plt.plot(data['Min'])
plt.xlabel('cities')
plt.ylabel('amount')
plt.title('Minimum Pollution Data')
Out[8]:
Text(0.5, 1.0, 'Minimum Pollution Data')

Checking which State have most Pollution

In [9]:
plt.figure(figsize=(20,10), dpi = 80)
sns.countplot(x='State',data=data)
plt.xlabel('State')
plt.tight_layout()

Plotting Min,Max and Avg Pollution (Pollutant wise)

In [10]:
data_p1=data[data.Pollutants=='PM2.5']
data_p1[['Max','Avg','Min']].plot()
plt.xlabel('cities')
plt.ylabel('amount')
plt.title('PM2.5')
Out[10]:
Text(0.5, 1.0, 'PM2.5')
In [11]:
data_p2=data[data.Pollutants=='PM10']
data_p2[['Max','Avg','Min']].plot()
plt.xlabel('cities')
plt.ylabel('amount')
plt.title('PM10')
Out[11]:
Text(0.5, 1.0, 'PM10')
In [12]:
data_p3=data[data.Pollutants=='NO2']
data_p3[['Max','Avg','Min']].plot()
plt.xlabel('cities')
plt.ylabel('amount')
plt.title('NO2')
Out[12]:
Text(0.5, 1.0, 'NO2')
In [13]:
data_p4=data[data.Pollutants=='NH3']
data_p4[['Max','Avg','Min']].plot()
plt.xlabel('cities')
plt.ylabel('amount')
plt.title('NH3')
Out[13]:
Text(0.5, 1.0, 'NH3')
In [14]:
data_p5=data[data.Pollutants=='SO2']
data_p5[['Max','Avg','Min']].plot()
plt.xlabel('cities')
plt.ylabel('amount')
plt.title('SO2')
Out[14]:
Text(0.5, 1.0, 'SO2')
In [15]:
data_p6=data[data.Pollutants=='CO']
data_p6[['Max','Avg','Min']].plot()
plt.xlabel('cities')
plt.ylabel('amount')
plt.title('CO')
Out[15]:
Text(0.5, 1.0, 'CO')
In [16]:
data_p7=data[data.Pollutants=='OZONE']
data_p7[['Max','Avg','Min']].plot()
plt.xlabel('cities')
plt.ylabel('amount')
plt.title('OZONE')
Out[16]:
Text(0.5, 1.0, 'OZONE')

Plotting Min,Max and Avg (State wise)

In [18]:
from pandas import DataFrame
df =DataFrame(data.State)
DataFrame.drop_duplicates(df)
Out[18]:
State
0 Andhra_Pradesh
33 Bihar
47 Delhi
286 Gujarat
291 Haryana
322 Jharkhand
327 Karnataka
393 Kerala
400 Madhya Pradesh
439 Maharashtra
498 Odisha
511 Punjab
560 Rajasthan
629 TamilNadu
644 Telangana
684 Uttar_Pradesh
774 West_Bengal
In [19]:
data_state1=data[data.State=='Andhra_Pradesh']
data_state1[['Min','Avg','Max']].plot()
plt.xlabel('cities')
plt.ylabel('Amount')
plt.title('Andhra_Pradesh')
Out[19]:
Text(0.5, 1.0, 'Andhra_Pradesh')
In [20]:
data_state2=data[data.State=='Bihar']
data_state2[['Min','Avg','Max']].plot()
plt.xlabel('cities')
plt.ylabel('Amount')
plt.title('Bihar')
Out[20]:
Text(0.5, 1.0, 'Bihar')
In [21]:
data_state3=data[data.State=='Delhi']
data_state3[['Min','Avg','Max']].plot()
plt.xlabel('cities')
plt.ylabel('Amount')
plt.title('Delhi')
Out[21]:
Text(0.5, 1.0, 'Delhi')
In [22]:
data_state4=data[data.State=='Gujarat']
data_state4[['Min','Avg','Max']].plot()
plt.xlabel('cities')
plt.ylabel('Amount')
plt.title('Gujarat')
Out[22]:
Text(0.5, 1.0, 'Gujarat')
In [23]:
data_state5=data[data.State=='Haryana']
data_state5[['Min','Avg','Max']].plot()
plt.xlabel('cities')
plt.ylabel('Amount')
plt.title('Haryana')
Out[23]:
Text(0.5, 1.0, 'Haryana')
In [24]:
data_state6=data[data.State=='Jharkhand']
data_state6[['Min','Avg','Max']].plot()
plt.xlabel('cities')
plt.ylabel('Amount')
plt.title('Jharkhand')
Out[24]:
Text(0.5, 1.0, 'Jharkhand')
In [25]:
data_state7=data[data.State=='Karnataka']
data_state7[['Min','Avg','Max']].plot()
plt.xlabel('cities')
plt.ylabel('Amount')
plt.title('Karnataka')
Out[25]:
Text(0.5, 1.0, 'Karnataka')
In [26]:
data_state8=data[data.State=='Kerala']
data_state8[['Min','Avg','Max']].plot()
plt.xlabel('cities')
plt.ylabel('Amount')
plt.title('Kerala')
Out[26]:
Text(0.5, 1.0, 'Kerala')
In [27]:
data_state9=data[data.State=='Madhya Pradesh']
data_state9[['Min','Avg','Max']].plot()
plt.xlabel('cities')
plt.ylabel('Amount')
plt.title('Madhya Pradesh')
Out[27]:
Text(0.5, 1.0, 'Madhya Pradesh')
In [28]:
data_state10=data[data.State=='Maharashtra']
data_state10[['Min','Avg','Max']].plot()
plt.xlabel('cities')
plt.ylabel('Amount')
plt.title('Maharashtra')
Out[28]:
Text(0.5, 1.0, 'Maharashtra')
In [30]:
data_state11=data[data.State=='Odisha']
data_state11[['Min','Avg','Max']].plot()
plt.xlabel('cities')
plt.ylabel('Amount')
plt.title('Odisha')
Out[30]:
Text(0.5, 1.0, 'Odisha')
In [31]:
data_state12=data[data.State=='Punjab']
data_state12[['Min','Avg','Max']].plot()
plt.xlabel('cities')
plt.ylabel('Amount')
plt.title('Punjab')
Out[31]:
Text(0.5, 1.0, 'Punjab')
In [32]:
data_state13=data[data.State=='Rajasthan']
data_state13[['Min','Avg','Max']].plot()
plt.xlabel('cities')
plt.ylabel('Amount')
plt.title('Rajasthan')
Out[32]:
Text(0.5, 1.0, 'Rajasthan')
In [33]:
data_state14=data[data.State=='TamilNadu']
data_state14[['Min','Avg','Max']].plot()
plt.xlabel('cities')
plt.ylabel('Amount')
plt.title('TamilNadu')
Out[33]:
Text(0.5, 1.0, 'TamilNadu')
In [34]:
data_state15=data[data.State=='Telangana']
data_state15[['Min','Avg','Max']].plot()
plt.xlabel('cities')
plt.ylabel('Amount')
plt.title('Telangana')
Out[34]:
Text(0.5, 1.0, 'Telangana')
In [35]:
data_state16=data[data.State=='Uttar_Pradesh']
data_state16[['Min','Avg','Max']].plot()
plt.xlabel('cities')
plt.ylabel('Amount')
plt.title('Uttar_Pradesh')
Out[35]:
Text(0.5, 1.0, 'Uttar_Pradesh')
In [36]:
data_state17=data[data.State=='West_Bengal']
data_state17[['Min','Avg','Max']].plot()
plt.xlabel('cities')
plt.ylabel('Amount')
plt.title('West_Bengal')
Out[36]:
Text(0.5, 1.0, 'West_Bengal')

Plotting Mean Pollutant Amount

In [37]:
data_pollu=data.groupby('Pollutants')
data_pollu.mean()
plt.figure(figsize=(20,10) , dpi=100)
plt.plot(data_pollu.mean())
plt.legend(['Max','Avg','Mean'])
plt.xlabel('Amount')
plt.ylabel('Pollutant Amounts')
Out[37]:
Text(0, 0.5, 'Pollutant Amounts')

Plotting Mean State Pollution

In [38]:
data_states=data.groupby('State')
data_states.mean()
Out[38]:
Avg Max Min
State
Andhra_Pradesh 41.321429 64.285714 23.928571
Bihar 121.357143 165.142857 74.071429
Delhi 139.072961 195.892704 80.339056
Gujarat 106.500000 173.250000 47.500000
Haryana 80.653846 143.076923 31.576923
Jharkhand 84.000000 136.200000 42.200000
Karnataka 51.295082 84.606557 23.803279
Kerala 67.000000 97.857143 44.000000
Madhya Pradesh 78.589744 142.974359 31.846154
Maharashtra 62.981132 121.735849 26.528302
Odisha 54.727273 113.090909 26.181818
Punjab 55.938776 89.102041 32.795918
Rajasthan 80.647059 153.220588 28.352941
TamilNadu 36.333333 79.666667 15.200000
Telangana 48.457143 85.228571 23.057143
Uttar_Pradesh 135.633333 194.166667 75.733333
West_Bengal 94.980000 150.780000 46.880000
In [40]:
plt.figure(figsize=(18,5) , dpi=100)
plt.plot(data_states.mean())
plt.legend(['Max','Avg','Min'])
plt.xlabel('States')
plt.ylabel('Amount')
plt.tight_layout()

Plotting mean City Pollution

In [41]:
data_city=data.groupby('city')
data_city.mean()
Out[41]:
Avg Max Min
city
Agra 105.400000 157.400000 58.800000
Ahmedabad 106.500000 173.250000 47.500000
Ajmer 67.428571 141.428571 25.714286
Alwar 62.571429 114.857143 16.714286
Amaravati 48.285714 82.857143 23.857143
... ... ... ...
Udaipur 72.285714 163.285714 28.428571
Ujjain 84.142857 169.714286 23.428571
Varanasi 157.333333 257.333333 97.333333
Vijayawada 30.500000 44.833333 23.666667
Visakhapatnam 43.833333 62.666667 27.166667

71 rows × 3 columns

In [43]:
plt.figure(figsize=(150,50) , dpi=100)
plt.plot(data_city.mean())
plt.xlabel('City')
plt.ylabel('Amount')
plt.show()
In [ ]: